% no_calibration_metric
%
% Runs the demonstrations and creates the figures describerd in:
%   Morrison G.S. (2021). In the context of forensic casework, are there meaningful metrics of the degree of calibration?
%   Forensic Science International: Synergy, 3, 100157. https://doi.org/10.1016/j.fsisyn.2021.100157
%
% tested on Matlab R2020b

clear all;
close all;
clc;
addpath('./functions/');
rng(0);

%% Fig 2
% (a): Monte Carlo population distributions.
% (b): Score-to-likelihood-ratio mapping function corresponding to (a).
% (c): Distributions of (a) after transformation using mapping function of (b). 
% (d): Score-to-likelihood-ratio mapping function corresponding to (c).

% plotting range
x_max_plot = 10;
x_min_plot = - x_max_plot;
x_min_max_plot = [x_min_plot, x_max_plot];
x_plot_step = (x_max_plot - x_min_plot) / 299;
x_plot = x_min_plot : x_plot_step : x_max_plot;


% Fig 2(a): Monte Carlo population distributions.
mu_s = 6;
mu_d = 3;
sigma = 1;

sigma2 = sigma^2;
L_s = normpdf(x_plot, mu_s, sigma);
L_d = normpdf(x_plot, mu_d, sigma);

L_mu_s = normpdf(mu_s, mu_s, sigma);
L_mu_d = normpdf(mu_d, mu_d, sigma);


% Fig 2(b): Score-to-likelihood-ratio mapping function corresponding to (a).

% Eq 3
b = (mu_s - mu_d) / sigma2;
% Eq 4
a = -b*(mu_s + mu_d)/2;
% Eq 2
y_min_max_plot = a + b*x_min_max_plot;


% Fig 2(c): Distributions of (a) after transformation using mapping function of (b). 

sigma_calibrated = b*sigma;
sigma2_calibrated = sigma_calibrated^2;
mu_s_calibrated = sigma2_calibrated/2;
mu_d_calibrated = -mu_s_calibrated;

L_s_calibrated = normpdf(x_plot, mu_s_calibrated, sigma_calibrated);
L_d_calibrated = normpdf(x_plot, mu_d_calibrated, sigma_calibrated);

L_mu_s_calibrated = normpdf(mu_s_calibrated, mu_s_calibrated, sigma_calibrated);
L_mu_d_calibrated = normpdf(mu_d_calibrated, mu_d_calibrated, sigma_calibrated);


% Fig 2(d): Score-to-likelihood-ratio mapping function corresponding to (c).

% Eq 3
b_calibrated = (mu_s_calibrated - mu_d_calibrated) / sigma2_calibrated;
% Eq 4
a_calibrated = -b_calibrated * (mu_s_calibrated + mu_d_calibrated)/2;
% Eq 2
y_min_max_plot_calibrated = a_calibrated + b_calibrated*x_min_max_plot;

 
% Plot Fig 2
h_fig = figure(2);
fig_position = get(h_fig, 'Position');
fig_ratio = [2 2];
fig_position_new = [200, 200, fig_position(4)*fig_ratio(1), fig_position(4)*fig_ratio(2)];
set(h_fig, 'Position',fig_position_new);  
plot_letter = {'(a)', '(c)', '(b)', '(d)'};
plot_line_width = 1;
plot_font_size = 12;
plot_pdf_y_lim = [0 0.45];
plot_x_ticks = x_min_plot : 2 : x_max_plot;

for I_plot = 1:4
    h_ax(I_plot) = subplot(2,2,I_plot);
    
    if I_plot == 1
        % Fig 2(a): Monte Carlo population distributions. 
        plot([0, 0], plot_pdf_y_lim, 'k', 'LineWidth',plot_line_width/2);
        hold on
        plot(x_plot, L_s, 'b', 'LineWidth',plot_line_width);
        plot(x_plot, L_d, 'r', 'LineWidth',plot_line_width);
        plot([mu_s, mu_s], [0 L_mu_s], 'k', 'LineWidth',plot_line_width);
        plot([mu_d, mu_d], [0 L_mu_d], 'k', 'LineWidth',plot_line_width);
        set(gca, 'XTickLabels',[], 'YLim',plot_pdf_y_lim);
        ylabel('probability density', 'FontSize',plot_font_size);
        pbaspect([2 1 1]);
        text(0.025, 0.9, plot_letter{I_plot}, 'FontSize',plot_font_size, 'Units','Normalized');
    end
    
    if I_plot == 2
        % Fig 2(c): Distributions of (a) after transformation using mapping function of (b). 
        plot([0, 0], plot_pdf_y_lim, 'k', 'LineWidth',plot_line_width/2);
        hold on
        plot(x_plot, L_s_calibrated, 'b', 'LineWidth',plot_line_width);
        plot(x_plot, L_d_calibrated, 'r', 'LineWidth',plot_line_width);
        plot([mu_s_calibrated, mu_s_calibrated], [0 L_mu_s_calibrated], 'k', 'LineWidth',plot_line_width);
        plot([mu_d_calibrated, mu_d_calibrated], [0 L_mu_d_calibrated], 'k', 'LineWidth',plot_line_width);
        set(gca, 'XTickLabels',[], 'YTickLabels',[], 'YLim',plot_pdf_y_lim);
        pbaspect([2 1 1]);
        text(0.025, 0.9, plot_letter{I_plot}, 'FontSize',plot_font_size, 'Units','Normalized');
    end
    
    if I_plot == 3
        % Fig 2(b): Score-to-likelihood-ratio mapping function corresponding to (a).
        plot(x_min_max_plot', [0 0], 'k', 'LineWidth',plot_line_width/2);
        hold on
        plot([0 0], x_min_max_plot', 'k', 'LineWidth',plot_line_width/2);
        plot(x_min_max_plot', y_min_max_plot', 'g', 'LineWidth',plot_line_width);
        axis square
        set(gca, 'YLim',x_min_max_plot, 'YTick', plot_x_ticks);
        xlabel('score', 'FontSize',plot_font_size);
        ylabel('ln(LR)', 'FontSize',plot_font_size);
        text(0.025, 0.95, plot_letter{I_plot}, 'FontSize',plot_font_size, 'Units','Normalized');
    end
    
    if I_plot == 4
        % Fig 2(b): Score-to-likelihood-ratio mapping function corresponding to (a).
        plot(x_min_max_plot', [0 0], 'k', 'LineWidth',plot_line_width/2);
        hold on
        plot([0 0], x_min_max_plot', 'k', 'LineWidth',plot_line_width/2);
        plot(x_min_max_plot', y_min_max_plot_calibrated', 'g', 'LineWidth',plot_line_width);
        axis square
        set(gca, 'YLim',x_min_max_plot, 'YTick', plot_x_ticks, 'YTickLabels',[]);
        xlabel('ln(LR)', 'FontSize',plot_font_size);
        text(0.025, 0.95, plot_letter{I_plot}, 'FontSize',plot_font_size, 'Units','Normalized');
    end
    
    xlim(x_min_max_plot);
    xticks(plot_x_ticks);
    grid on
    
    ax_ratio = 1.2;
    ax_position = get(h_ax(I_plot),'Position'); 
    ax_position_new(1) = ax_position(1) - ax_position(3)*(ax_ratio-1)/2;
    ax_position_new(2) = ax_position(2) - ax_position(4)*(ax_ratio-1)/2;
    ax_position_new(3) = ax_position(3)*ax_ratio;
    ax_position_new(4) = ax_position(4)*ax_ratio;
    set(h_ax(I_plot), 'Position',ax_position_new);

    hold off
end


%% Fig 3
% calibrated distributions, for σ = 1, 2, 3, 4

sigma_row = [1 2 3 4];
mu_s_row = sigma_row.^2/2;
mu_d_row = -mu_s_row;

num_row = length(sigma_row);

h_fig = figure(3);
fig_position = get(h_fig, 'Position');
fig_ratio = [1 2.25];
fig_position_new = [200, 200, fig_position(3)*fig_ratio(1), fig_position(4)*fig_ratio(2)];
set(h_fig, 'Position',fig_position_new);  

plot_letter = {'\sigma = 1', '\sigma = 2', '\sigma = 3', '\sigma = 4', };

for I_row = 1:num_row
    h_ax_3(I_row) = subplot(num_row,1,I_row);

    L_s_temp = normpdf(x_plot, mu_s_row(I_row), sigma_row(I_row));
    L_d_temp = normpdf(x_plot, mu_d_row(I_row), sigma_row(I_row));

    L_mu_s_temp = normpdf(mu_s_row(I_row), mu_s_row(I_row), sigma_row(I_row));
    L_mu_d_temp = normpdf(mu_d_row(I_row), mu_d_row(I_row), sigma_row(I_row));

    plot([0, 0], plot_pdf_y_lim, 'k', 'LineWidth',plot_line_width/2);
    hold on
    plot(x_plot, L_s_temp, 'b', 'LineWidth',plot_line_width);
    plot(x_plot, L_d_temp, 'r', 'LineWidth',plot_line_width);
    plot([mu_s_row(I_row), mu_s_row(I_row)], [0 L_mu_s_temp], 'k', 'LineWidth',plot_line_width);
    plot([mu_d_row(I_row), mu_d_row(I_row)], [0 L_mu_d_temp], 'k', 'LineWidth',plot_line_width);

    ylabel('probability density', 'FontSize',plot_font_size);

    if I_row < num_row
        set(gca, 'XTickLabels',[]);
    else
        xlabel('calibrated ln(LR)', 'FontSize',plot_font_size);
    end

    xticks(plot_x_ticks);
    ylim(plot_pdf_y_lim);
    grid on
    pbaspect([2 1 1]);
    text(0.025, 0.9, plot_letter{I_row}, 'FontSize',plot_font_size, 'Units','Normalized');

    ax_ratio = 1.2;
    ax_position = get(h_ax_3(I_row),'Position'); 
    ax_position_new(1) = ax_position(1) - ax_position(3)*(ax_ratio-1)/2;
    ax_position_new(2) = ax_position(2) - ax_position(4)*(ax_ratio-1)/2;
    ax_position_new(3) = ax_position(3)*ax_ratio;
    ax_position_new(4) = ax_position(4)*ax_ratio;
    set(h_ax_3(I_row), 'Position',ax_position_new);
end


%% Fig 4 Gumbel distribution

L_s_Gumbel = pdf('Generalized Extreme Value', mu_s-x_plot, 0, sigma, 0);
L_mu_s_Gumbel = pdf('Generalized Extreme Value', 0, 0, sigma, 0);

% Plot Fig 4
h_fig = figure(4);
fig_position = get(h_fig, 'Position');
fig_ratio = [1.05 0.6];
fig_position_new = [200, 200, fig_position(4)*fig_ratio(1), fig_position(4)*fig_ratio(2)];
set(h_fig, 'Position',fig_position_new);  

plot([0, 0], plot_pdf_y_lim, 'k', 'LineWidth',plot_line_width/2);
hold on
plot(x_plot, L_s_Gumbel, 'b', 'LineWidth',plot_line_width);
plot(x_plot, L_d, 'r', 'LineWidth',plot_line_width);
plot([mu_s, mu_s], [0 L_mu_s_Gumbel], 'k', 'LineWidth',plot_line_width);
plot([mu_d, mu_d], [0 L_mu_d], 'k', 'LineWidth',plot_line_width);
set(gca, 'XTick',plot_x_ticks, 'YLim',plot_pdf_y_lim);
ylabel('probability density', 'FontSize',plot_font_size);
xlabel('score', 'FontSize',plot_font_size);
grid on
pbaspect([2 1 1]);


%% Figs 5–7: Monte Carlo simulations

num_s = 50;
num_d = (num_s^2 - num_s) / 2;

num_iter = 10000; % set to 10000

Cllr_LDA = NaN(num_iter, 2);
Cllr_LDA_LDA = Cllr_LDA;
Cllr_LDA_PAV = Cllr_LDA;
Cllr_LogReg = Cllr_LDA;
Cllr_LogReg_LogReg = Cllr_LDA;
Cllr_LogReg_PAV = Cllr_LDA;
LDA_devPAV = Cllr_LDA;
LogReg_devPAV = Cllr_LDA;
devLDA = Cllr_LDA;
devLogReg = Cllr_LDA;

parfor I_iter = 1:num_iter
    
    % Gaussian different-source
    x_d = random('Normal', mu_d, sigma, [num_d 2]);
    
    for I_dist_type = 1:2
        if I_dist_type == 1
            % Gaussian same-source
            x_s = random('Normal', mu_s, sigma, [num_s 2]);
        else
            % Gumbel same-source
            x_s = mu_s - random('Generalized Extreme Value', 0, sigma, 0, [num_s 2]);
        end

        % LDA
        [a_LDA, b_LDA] = train_LDA(x_s(:,1), x_d(:,1));
        ln_LR_s_LDA = a_LDA + b_LDA*x_s(:,2);
        ln_LR_d_LDA = a_LDA + b_LDA*x_d(:,2);
        Cllr_LDA(I_iter, I_dist_type) = cllr(ln_LR_s_LDA, ln_LR_d_LDA);
        
        % LDA-LDA
        [a_LDA_LDA, b_LDA_LDA] = train_LDA(ln_LR_s_LDA, ln_LR_d_LDA);
        ln_LR_s_LDA_LDA = a_LDA_LDA + b_LDA_LDA*ln_LR_s_LDA;
        ln_LR_d_LDA_LDA = a_LDA_LDA + b_LDA_LDA*ln_LR_d_LDA;
        Cllr_LDA_LDA(I_iter, I_dist_type) = cllr(ln_LR_s_LDA_LDA, ln_LR_d_LDA_LDA);

        % LDA-PAV
        ln_LR_LDA_PAV = pav_transform([], ln_LR_s_LDA, ln_LR_d_LDA);
        ln_LR_s_LDA_PAV = ln_LR_LDA_PAV(1:num_s);
        ln_LR_d_LDA_PAV = ln_LR_LDA_PAV(num_s+1:end);
        Cllr_LDA_PAV(I_iter, I_dist_type) = cllr(ln_LR_s_LDA_PAV, ln_LR_d_LDA_PAV);
        
        % LDA-devPAV
        range_min = min(ln_LR_s_LDA);
        range_max = max(ln_LR_d_LDA);
        ln_LR_LDA = [ln_LR_s_LDA; ln_LR_d_LDA];
        II_range = (ln_LR_LDA >= range_min) & (ln_LR_LDA <= range_max);
        LDA_devPAV(I_iter, I_dist_type) = devPAV(ln_LR_LDA(II_range), ln_LR_LDA_PAV(II_range));
        % devLDA is an analogue of devPAV
        ln_LR_LDA_LDA = [ln_LR_s_LDA_LDA; ln_LR_d_LDA_LDA];
        devLDA(I_iter, I_dist_type) = devPAV(ln_LR_LDA(II_range), ln_LR_LDA_LDA(II_range));
            
        % LogReg
        w_LogReg = train_llr_fusion(x_s(:,1)', x_d(:,1)', 0.5);
        ln_LR_s_LogReg = lin_fusion(w_LogReg, x_s(:,2)');
        ln_LR_d_LogReg = lin_fusion(w_LogReg, x_d(:,2)');
        Cllr_LogReg(I_iter, I_dist_type) = cllr(ln_LR_s_LogReg, ln_LR_d_LogReg);

        % LogReg-LogReg
        w_LogReg_LogReg = train_llr_fusion(ln_LR_s_LogReg, ln_LR_d_LogReg, 0.5);
        ln_LR_s_LogReg_LogReg = lin_fusion(w_LogReg_LogReg, ln_LR_s_LogReg);
        ln_LR_d_LogReg_LogReg = lin_fusion(w_LogReg_LogReg, ln_LR_d_LogReg);
        Cllr_LogReg_LogReg(I_iter, I_dist_type) = cllr(ln_LR_s_LogReg_LogReg, ln_LR_d_LogReg_LogReg);

        % LogReg-PAV
        ln_LR_LogReg_PAV = pav_transform([], ln_LR_s_LogReg, ln_LR_d_LogReg);
        ln_LR_s_LogReg_PAV = ln_LR_LogReg_PAV(1:num_s);
        ln_LR_d_LogReg_PAV = ln_LR_LogReg_PAV(num_s+1:end);
        Cllr_LogReg_PAV(I_iter, I_dist_type) = cllr(ln_LR_s_LogReg_PAV, ln_LR_d_LogReg_PAV);
        
        % LogReg-devPAV
        range_min = min(ln_LR_s_LogReg);
        range_max = max(ln_LR_d_LogReg);
        ln_LR_LogReg = [ln_LR_s_LogReg, ln_LR_d_LogReg];
        II_range = (ln_LR_LogReg >= range_min) & (ln_LR_LogReg <= range_max);
        LogReg_devPAV(I_iter, I_dist_type) = devPAV(ln_LR_LogReg(II_range), ln_LR_LogReg_PAV(II_range));
        % devLogReg is an analogue of devPAV
        ln_LR_LogReg_LogReg = [ln_LR_s_LogReg_LogReg, ln_LR_d_LogReg_LogReg];
        devLogReg(I_iter, I_dist_type) = devPAV(ln_LR_LogReg(II_range), ln_LR_LogReg_LogReg(II_range));

    end
end

% Cllr_calibrated - Cllr_recalibrated
Cllr_cal_recal_LDA_LDA          = Cllr_LDA      - Cllr_LDA_LDA;
Cllr_cal_recal_LDA_PAV          = Cllr_LDA      - Cllr_LDA_PAV;
Cllr_cal_recal_LogReg_logReg    = Cllr_LogReg   - Cllr_LogReg_LogReg;
Cllr_cal_recal_LogReg_PAV       = Cllr_LogReg   - Cllr_LogReg_PAV;


% Cllr_expected_Gauss
num_expected = 500000; % set to 500000

x_s_expected_Gauss = random('Normal', mu_s, sigma, [num_expected 1]);
x_d_expected_Gauss = random('Normal', mu_d, sigma, [num_expected 1]);

L_s_expected_Gauss_numerator = pdf('Normal', x_s_expected_Gauss, mu_s, sigma);
L_s_expected_Gauss_denominator = pdf('Normal', x_s_expected_Gauss, mu_d, sigma);

L_d_expected_Gauss_numerator = pdf('Normal', x_d_expected_Gauss, mu_s, sigma);
L_d_expected_Gauss_denominator = pdf('Normal', x_d_expected_Gauss, mu_d, sigma);

ln_LR_s_expected_Gauss = log(L_s_expected_Gauss_numerator) - log(L_s_expected_Gauss_denominator);
ln_LR_d_expected_Gauss = log(L_d_expected_Gauss_numerator) - log(L_d_expected_Gauss_denominator);

Cllr_expected_Gauss = cllr(ln_LR_s_expected_Gauss, ln_LR_d_expected_Gauss);

% Cllr_expected_Gumbel
x_s_expected_Gumbel = mu_s - random('Generalized Extreme Value', 0, sigma, 0, [num_expected 1]);

L_s_expected_Gumbel_numerator = pdf('Generalized Extreme Value', -x_s_expected_Gumbel, 0, sigma, -mu_s);
L_d_expected_Gumbel_numerator = pdf('Generalized Extreme Value', -x_d_expected_Gauss, 0, sigma, -mu_s);

L_s_expected_Gumbel_denominator = pdf('Normal', x_s_expected_Gumbel, mu_d, sigma);

ln_LR_s_expected_Gumbel = log(L_s_expected_Gumbel_numerator) - log(L_s_expected_Gumbel_denominator);
ln_LR_d_expected_Gumbel = log(L_d_expected_Gumbel_numerator) - log(L_d_expected_Gauss_denominator);

Cllr_expected_Gumbel = cllr(ln_LR_s_expected_Gumbel, ln_LR_d_expected_Gumbel);

fprintf('\nCllr_expected_Gauss:\t%0.3f\nCllr_expected_Gumbel:\t%0.3f\n', [Cllr_expected_Gauss, Cllr_expected_Gumbel])

% Cllr_calibrated - Cllr_expected
Cllr_cal_expect_LDA(:,1)    = Cllr_LDA(:,1)     - Cllr_expected_Gauss;
Cllr_cal_expect_LDA(:,2)    = Cllr_LDA(:,2)     - Cllr_expected_Gumbel;
Cllr_cal_expect_LogReg(:,1) = Cllr_LogReg(:,1)  - Cllr_expected_Gauss;
Cllr_cal_expect_LogReg(:,2) = Cllr_LogReg(:,2)  - Cllr_expected_Gumbel;
Cllr_cal_expect_PAV(:,1)    = Cllr_LDA_PAV(:,1) - Cllr_expected_Gauss;
Cllr_cal_expect_PAV(:,2)    = Cllr_LDA_PAV(:,2) - Cllr_expected_Gumbel;


% plot figures
set_y_lim = true;
if set_y_lim
    y_lim_cal_expected = [-0.3 0.6];
    y_lim_cal_recal = [-0.1 0.4];
    y_lim_devPAV = [-0.1 1.6];
else
    y_lim_cal_expected = [];
    y_lim_cal_recal = [];
    y_lim_devPAV = [];
end

% Fig 5: cal-expected
h_fig = figure(5);
x_labels = {'LDA' 'LogReg' 'PAV'};
y_label = '\itC\rm_{llr} – \itC\rm_{llr}^{expect}';
y_lim = y_lim_cal_expected;
num_rows = 2;
metric_data = cell(num_rows,1);
for I_row = 1:num_rows
    metric_data{I_row} = [Cllr_cal_expect_LDA(:,I_row), Cllr_cal_expect_LogReg(:,I_row), Cllr_cal_expect_PAV(:,I_row)];
end
monte_carlo_plot(h_fig, metric_data, x_labels, y_lim, y_label);

% Fig 6: cal-recal
h_fig = figure(6);
x_labels = {'LDA-LDA' 'LDA-PAV' 'LogReg-LogReg' 'LogReg-PAV'};
ylabel('\itC\rm_{llr} – \itC\rm_{llr}^{recal}')
y_lim = y_lim_cal_recal;
num_rows = 2;
metric_data = cell(num_rows,1);
for I_row = 1:num_rows
    metric_data{I_row} = [Cllr_cal_recal_LDA_LDA(:,I_row), Cllr_cal_recal_LDA_PAV(:,I_row), Cllr_cal_recal_LogReg_logReg(:,I_row), Cllr_cal_recal_LogReg_PAV(:,I_row)];
end
monte_carlo_plot(h_fig, metric_data, x_labels, y_lim, y_label);

% Fig 9: devPAV
h_fig = figure(7);
x_labels = {'LDA-LDA' 'LDA-PAV' 'LogReg-LogReg' 'LogReg-PAV'};
y_label = 'devPAV / devLDA / devLogReg';
y_lim = y_lim_devPAV;
num_rows = 2;
metric_data = cell(num_rows,1);
for I_row = 1:num_rows
    metric_data{I_row} = [devLDA(:,I_row), LDA_devPAV(:,I_row), devLogReg(:,I_row), LogReg_devPAV(:,I_row)];
end
monte_carlo_plot(h_fig, metric_data, x_labels, y_lim, y_label);


% cleanup
rmpath('./functions/');